Short Report of Spectronaut output:

load data from Spectronaut pipeline mode output

library(tidyverse)
## ── Attaching packages ──────────────────────────────────────────────────────────────────────────────────────────────── tidyverse 1.2.1 ──
## ✔ ggplot2 2.2.1.9000     ✔ purrr   0.2.4     
## ✔ tibble  1.4.2          ✔ dplyr   0.7.4     
## ✔ tidyr   0.8.0          ✔ stringr 1.3.0     
## ✔ readr   1.1.1          ✔ forcats 0.3.0
## ── Conflicts ─────────────────────────────────────────────────────────────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
setwd("~/Desktop/Projects/2018_Laura_HBE_cell_infection_Saureus_side_analysis_SN/20180327_145604_TW_Saureus_HBE_iRT_percentile0-25_SN_normalization")
d<-read_delim("Report_full Report complex (default) (Normal).xls",delim ="\t",col_names = T)
## Parsed with column specification:
## cols(
##   .default = col_double(),
##   R.Condition = col_character(),
##   R.FileName = col_character(),
##   R.Fraction = col_character(),
##   R.Label = col_character(),
##   R.Replicate = col_integer(),
##   PG.Organisms = col_character(),
##   PG.ProteinAccessions = col_character(),
##   PG.ProteinGroups = col_character(),
##   PG.RunEvidenceCount = col_integer(),
##   PEP.GroupingKey = col_character(),
##   PEP.GroupingKeyType = col_character(),
##   PEP.IsProteotypic = col_character(),
##   PEP.NrOfMissedCleavages = col_integer(),
##   PEP.StrippedSequence = col_character(),
##   PEP.Rank = col_integer(),
##   PEP.RunEvidenceCount = col_integer(),
##   PEP.UsedForProteinGroupQuantity = col_character(),
##   EG.IsDecoy = col_character(),
##   EG.Label = col_character(),
##   EG.Library = col_character()
##   # ... with 19 more columns
## )
## See spec(...) for full column specifications.
colnames(d)
##  [1] "R.Condition"                       
##  [2] "R.FileName"                        
##  [3] "R.Fraction"                        
##  [4] "R.Label"                           
##  [5] "R.Replicate"                       
##  [6] "PG.Organisms"                      
##  [7] "PG.ProteinAccessions"              
##  [8] "PG.ProteinGroups"                  
##  [9] "PG.Cscore"                         
## [10] "PG.Qvalue"                         
## [11] "PG.RunEvidenceCount"               
## [12] "PEP.GroupingKey"                   
## [13] "PEP.GroupingKeyType"               
## [14] "PEP.IsProteotypic"                 
## [15] "PEP.NrOfMissedCleavages"           
## [16] "PEP.StrippedSequence"              
## [17] "PEP.Rank"                          
## [18] "PEP.RunEvidenceCount"              
## [19] "PEP.Quantity"                      
## [20] "PEP.UsedForProteinGroupQuantity"   
## [21] "EG.iRTPredicted"                   
## [22] "EG.IsDecoy"                        
## [23] "EG.Label"                          
## [24] "EG.Library"                        
## [25] "EG.ModifiedPeptide"                
## [26] "EG.ModifiedSequence"               
## [27] "EG.PrecursorId"                    
## [28] "EG.UserGroup"                      
## [29] "EG.Workflow"                       
## [30] "EG.Identified"                     
## [31] "EG.IsUserPeak"                     
## [32] "EG.IsVerified"                     
## [33] "EG.PEP"                            
## [34] "EG.Qvalue"                         
## [35] "EG.Svalue"                         
## [36] "EG.ApexRT"                         
## [37] "EG.DatapointsPerPeak"              
## [38] "EG.DeltaiRT"                       
## [39] "EG.DeltaRT"                        
## [40] "EG.iRTEmpirical"                   
## [41] "EG.MeanApexRT"                     
## [42] "EG.MeanTailingFactor"              
## [43] "EG.RTPredicted"                    
## [44] "EG.SignalToNoise"                  
## [45] "EG.AvgProfileQvalue"               
## [46] "EG.MaxProfileQvalue"               
## [47] "EG.MinProfileQvalue"               
## [48] "EG.PercentileQvalue"               
## [49] "EG.NormalizationFactor"            
## [50] "EG.ReferenceQuantity (Settings)"   
## [51] "EG.TargetQuantity (Settings)"      
## [52] "EG.TargetReferenceRatio (Settings)"
## [53] "EG.TotalQuantity (Settings)"       
## [54] "EG.UsedForPeptideQuantity"         
## [55] "EG.UsedForProteinGroupQuantity"    
## [56] "EG.Cscore"                         
## [57] "EG.IntCorrScore"                   
## [58] "EG.Noise"                          
## [59] "FG.Charge"                         
## [60] "FG.FragmentCount"                  
## [61] "FG.Id"                             
## [62] "FG.IsotopeLabelType"               
## [63] "FG.Label"                          
## [64] "FG.PrecMz"                         
## [65] "FG.PrecMzCalibrated"               
## [66] "FG.Reference"                      
## [67] "FG.SyntheticIsotopeGroupLabel"     
## [68] "FG.FWHM"                           
## [69] "FG.MeanApexRT"                     
## [70] "FG.MeanTailingFactor"              
## [71] "FG.PrecWindowNumber"               
## [72] "FG.PrecursorSignalToNoise"         
## [73] "FG.SignalToNoise"                  
## [74] "FG.ShapeQualityScore"              
## [75] "FG.MS1PeakArea"                    
## [76] "FG.NormalizedMS1PeakArea"          
## [77] "FG.MS2PeakArea"                    
## [78] "FG.NormalizedMS2PeakArea"          
## [79] "FG.HasPossibleInterference (MS1)"  
## [80] "FG.HasPossibleInterference (MS2)"  
## [81] "FG.Quantity"                       
## [82] "FG.Noise"

Filter Spectronaut output report for distinct ions with a Q-value <= 0.001.

psm.distinct<-filter(d,EG.Qvalue<=0.001)%>%group_by(R.FileName,R.Condition)%>%summarise(distinct_ions=n_distinct(EG.Label))
## Warning: package 'bindrcpp' was built under R version 3.4.4

normalize data:

  1. Raw data (already in report)
  2. data normalized in Spectronaut with local normalization over complete profiles (already in report)
  3. global normalization using the median
## 
## Attaching package: 'gridExtra'
## The following object is masked from 'package:dplyr':
## 
##     combine
## Warning: Transformation introduced infinite values in continuous y-axis
## Warning: Removed 96 rows containing non-finite values (stat_boxplot).
## Warning: Transformation introduced infinite values in continuous y-axis
## Warning: Removed 96 rows containing non-finite values (stat_boxplot).

calculate CV over ions for normalized and unnormalized data.

cv.all<-d%>%group_by(R.Condition,EG.Label)%>%
  summarise(CV_Spectronaut_norm=sd(FG.NormalizedMS2PeakArea,na.rm=T)/mean(FG.NormalizedMS2PeakArea,na.rm=T),
            CV_raw_data=sd(FG.MS2PeakArea,na.rm=T)/mean(FG.MS2PeakArea,na.rm=T),
            CV_median_norm=sd(FG.MS2PeakArea.median.norm,na.rm=T)/mean(FG.MS2PeakArea.median.norm,na.rm=T))
cv.plot<-gather(cv.all,"CV_raw_data","CV_Spectronaut_norm","CV_median_norm",key="normalization_method",value="CV")
cv.plot$normalization_method<-factor(cv.plot$normalization_method,levels = c("CV_raw_data","CV_Spectronaut_norm","CV_median_norm"))
## Warning: Removed 4 rows containing non-finite values (stat_boxplot).

PCA based on different normalization and raw data (log2 transformed)

## Warning: package 'FactoMineR' was built under R version 3.4.4
## Welcome! Related Books: `Practical Guide To Cluster Analysis in R` at https://goo.gl/13EFCZ
## 
## Attaching package: 'reshape'
## The following object is masked from 'package:dplyr':
## 
##     rename
## The following objects are masked from 'package:tidyr':
## 
##     expand, smiths